# -------------------------------------------------------------------
# Purpose: Creates Table B1
# Author:  Max Posch, 25/07/2025
# Usage:   Source this script to generate the table.
# -------------------------------------------------------------------
# Check that required paths exist
stopifnot(dir.exists(pdataconfanalysis))
stopifnot(dir.exists(poutputappendix))


# Load surname data
load(file.path(pdataconfanalysis, "surnameCountyLevel19001940.RData"))


# Create table 
temp <- surnameCountyLevel19001940 %>% select(
  `Surname diversity` = entropy_namelast_mp_adjp_w,
  `Predicted surname diversity` = iv_lo_entropy_namelast_mp_adjp_fe_immig_w,
  `Population` = sum_n_namelast_mp_adjp_w,
  `Predicted population` = iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_w,
  `Patents per 1,000 people` = sum_patents_pc_1900_f_w,
  `Breakthrough patents per 1,000 people` = sum_break_p80_rrfsim05_pc_1900_f_w
)
tablename <- file.path(poutputappendix, "tableB02.tex")
options("modelsummary_format_numeric_latex" = "plain")
modelsummary::datasummary(
    `Surname diversity` +
    `Predicted surname diversity` +
    `Population` +
    `Predicted population` +
    `Patents per 1,000 people` +
    `Breakthrough patents per 1,000 people`
  ~ N + Mean + SD + Min + Max,
  data = temp,
  output = tablename,
  align = "lccccc",
  fmt = comma3
)


get_summary_stats_rows(tablename)
add_table_row(tablename,
  "Surname diversity &",
  c("\\textit{\\textbf{Panel A: Surname-county level}} \\\\", "\\cmidrule(lr){1-6}", "\\textbf{Surname Diversity and Population} \\\\"),
  where = "before"
)
add_table_row(tablename,
  "Patents per 1,000 people",
  "\\textbf{Patents and Breakthroughs} \\\\",
  where = "before"
)




# Load inventor data
load(file.path(pdataconfanalysis, "inventorLevel19001940.RData"))


# Create table
temp <- inventorLevel19001940 %>% select(
  `Surname diversity` = entropy_namelast_mp_adjp_w,
  `Predicted surname diversity` = iv_lo_entropy_namelast_mp_adjp_fe_immig_w,
  `Population` = sum_n_namelast_mp_adjp_w,
  `Predicted population` = iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_w,
  `Patents` = sum_patents_f_w,
  `Breakthrough patents` = sum_break_p80_rrfsim05_f_w
)
temptable <- file.path(poutputappendix, "temp.tex")
modelsummary::datasummary(
  `Surname diversity` +
    `Predicted surname diversity` +
    `Population` +
    `Predicted population` +
    `Patents` +
    `Breakthrough patents`
  ~ N + Mean + SD + Min + Max,
  data = temp,
  output = temptable,
  align = "lccccc",
  fmt = comma3
)
get_summary_stats_rows(temptable, 1)
edit_table_content(temptable, "top 20\\\\% in terms of fsim5\\/bsim5", "Breakthrough patents")
remove_table_row(temptable, "SD")
add_table_row(temptable,
  "Surname diversity &",
  c("\\\\", "\\textit{\\textbf{Panel B: Inventor level}} \\\\", "\\cmidrule(lr){1-6}", "\\textbf{Surname Diversity and Population} \\\\"),
  where = "before"
)
add_table_row(temptable,
  "Patents",
  "\\textbf{Patents and Breakthroughs} \\\\",
  where = "before"
)
add_table_row(tablename, "bottomrule", readLines(temptable), where = "before")


# Load patent data
load(file.path(pdataconfanalysis, "patentLevel19001940.RData"))


# Create table 
temp <- patentLevel19001940 %>% select(
  `Surname diversity of patent` = entropy_patent_namelast_mp_adjp_w,
  `Predicted surname diversity` = iv_lo_entropy_namelast_mp_adjp_fe_immig_w,
  `Team size` = no_of_inventors,
  `Technologies per patent` = n_uspto_techn,
  `Breakthrough patent indicator` = break_p80_rrfsim05
)
temptable <- file.path(poutputappendix, "temp.tex")
modelsummary::datasummary(
  `Surname diversity of patent` +
    `Predicted surname diversity` +
    `Team size` +
    `Technologies per patent` +
    `Breakthrough patent indicator`
  ~ N + Mean + SD + Min + Max,
  data = temp,
  output = temptable,
  align = "lccccc",
  fmt = comma3
)
get_summary_stats_rows(temptable, 1)
edit_table_content(temptable, "top 20\\\\% in terms of fsim5\\/bsim5", "Breakthrough patent indicator")
remove_table_row(temptable, "SD")
add_table_row(temptable,
  "Surname diversity of patent &",
  c("\\\\", "\\textit{\\textbf{Panel C: Patent level}} \\\\", "\\cmidrule(lr){1-6}", "\\textbf{Surname Diversity and Team Size} \\\\"),
  where = "before"
)
add_table_row(temptable,
  "Technologies per patent",
  "\\textbf{Patents and Breakthroughs} \\\\",
  where = "before"
)
add_table_row(tablename, "bottomrule", readLines(temptable), where = "before")
add_table_row(tablename,
  "toprule",
  "\\\\",
  where = "before"
)
file.remove(temptable)
cat("Table B2 saved to:", tablename, "\n")